# Importo las librerías las cuales voy a trabajar
import plotly.graph_objects as go
import pandas as pd
# Importo el dataset con el cual voy a realizar la visualización 1.
# Para este ejemplo usaré un dataset alojado en Github:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/'
'master/titanic.csv')
# Muestro el encabezado del dataset
df.head()
| PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
# Hago la primera agrupación
df1 = df.groupby(['Pclass', 'Sex'])['Name'].count().reset_index()
# Renombro las columnas
df1.columns = ['source', 'target', 'value']
# Mapeo los valores de la variable 'source'
df1['source'] = df1.source.map({1: '1ª clase',
2: '2ª clase',
3: '3ª clase'})
df1.head()
| source | target | value | |
|---|---|---|---|
| 0 | 1ª clase | female | 9 |
| 1 | 1ª clase | male | 21 |
| 2 | 2ª clase | female | 12 |
| 3 | 2ª clase | male | 18 |
| 4 | 3ª clase | female | 35 |
# Hago la segunda agrupación
df2 = df.groupby(['Sex', 'Survived'])['Name'].count().reset_index()
# Renombro las columnas para la segunda agrupación
df2.columns = ['source', 'target', 'value']
# Mapeo los valores de la variable 'target'
df2['target'] = df2.target.map({1: 'Survived', 0: 'Died'})
df2.head()
| source | target | value | |
|---|---|---|---|
| 0 | female | Died | 16 |
| 1 | female | Survived | 40 |
| 2 | male | Died | 86 |
| 3 | male | Survived | 14 |
# Concateno ambas agrupaciones creando el dataset 'definitive'
definitive = pd.concat([df1, df2], axis=0)
definitive
| source | target | value | |
|---|---|---|---|
| 0 | 1ª clase | female | 9 |
| 1 | 1ª clase | male | 21 |
| 2 | 2ª clase | female | 12 |
| 3 | 2ª clase | male | 18 |
| 4 | 3ª clase | female | 35 |
| 5 | 3ª clase | male | 61 |
| 0 | female | Died | 16 |
| 1 | female | Survived | 40 |
| 2 | male | Died | 86 |
| 3 | male | Survived | 14 |
# Señalo los elementos únicos del nuevo dataset llamado 'definitive'
unique_source_target = list(pd.unique(definitive[['source','target']].
values.ravel('K')))
unique_source_target
['1ª clase', '2ª clase', '3ª clase', 'female', 'male', 'Died', 'Survived']
# Mapeo los elementos del nuevo dataset
mapping_dict = {k: v for v, k in enumerate(unique_source_target)}
mapping_dict
{'1ª clase': 0,
'2ª clase': 1,
'3ª clase': 2,
'female': 3,
'male': 4,
'Died': 5,
'Survived': 6}
# Transformo los datos con map
definitive['source'] = definitive['source'].map(mapping_dict)
definitive['target'] = definitive['target'].map(mapping_dict)
definitive
| source | target | value | |
|---|---|---|---|
| 0 | 0 | 3 | 9 |
| 1 | 0 | 4 | 21 |
| 2 | 1 | 3 | 12 |
| 3 | 1 | 4 | 18 |
| 4 | 2 | 3 | 35 |
| 5 | 2 | 4 | 61 |
| 0 | 3 | 5 | 16 |
| 1 | 3 | 6 | 40 |
| 2 | 4 | 5 | 86 |
| 3 | 4 | 6 | 14 |
# Convierto el dataset 'definitive' en una lista
definitive_dict = definitive.to_dict(orient='list')
# Creo un gráfico de Sankey con Ploty
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = unique_source_target,
color = "blue"
),
link = dict(
source = definitive_dict["source"],
target = definitive_dict["target"],
value = definitive_dict["value"]
))])
# Visualizo el diagrama de Sankey
fig.update_layout(title_text="Sankey Diagram de los supervivientes del Titanic",
font_size=10)
fig.show()